{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/higgs/anaconda2/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import os\n",
    "import sys\n",
    "sys.path.append('./bert')\n",
    "import numpy as np\n",
    "import transformer\n",
    "import embedding_layer\n",
    "import model_utils\n",
    "from collections import defaultdict\n",
    "import collections\n",
    "import summary_model\n",
    "import lm_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['vocab_size',\n",
       " 'num_hidden_layers',\n",
       " 'default_batch_size',\n",
       " 'label_smoothing',\n",
       " 'default_batch_size_tpu',\n",
       " 'filter_size',\n",
       " 'static_batch',\n",
       " 'learning_rate_decay_rate',\n",
       " 'alpha',\n",
       " 'allow_ffn_pad',\n",
       " 'num_types',\n",
       " 'use_tpu',\n",
       " 'attention_dropout',\n",
       " 'layer_postprocess_dropout',\n",
       " 'relu_dropout',\n",
       " 'learning_rate_warmup_steps',\n",
       " 'optimizer_adam_beta1',\n",
       " 'optimizer_adam_beta2',\n",
       " 'num_heads',\n",
       " 'beam_size',\n",
       " 'max_length',\n",
       " 'extra_decode_length',\n",
       " 'initializer_gain',\n",
       " 'initializer_range',\n",
       " 'hidden_size',\n",
       " 'optimizer_adam_epsilon',\n",
       " 'learning_rate']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_len = 64\n",
    "inp = tf.placeholder(tf.int32, shape=[None, max_len], name='inp')\n",
    "inp_len = tf.placeholder(tf.int32, shape=[None], name='inp_len')\n",
    "config = summary_model.BASE_PARAMS\n",
    "config.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "config['batch_size'] = 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "32003"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "config['vocab_size']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "200"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "config['max_length']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tokenization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = tokenization.FullTokenizer('./model/vocab.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21128"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(tokenizer.vocab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "config['vocab_size'] = len(tokenizer.vocab)\n",
    "config['max_length'] = 64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "transformer_m = transformer.Transformer(params=config, train=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "attention_bias = model_utils.get_decoder_self_attention_bias(config['max_length'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "encoder_outputs = transformer_m.encode(inp, attention_bias)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "logits = transformer_m.embedding_softmax_layer.linear(encoder_outputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor 'presoftmax_linear/Reshape_1:0' shape=(?, ?, 21128) dtype=float32>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "weights = tf.sequence_mask(inp_len, config['max_length'], dtype=tf.int32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor 'SequenceMask/Cast_1:0' shape=(?, 64) dtype=int32>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss = model_utils.soft_cross_entropy_loss(logits, inp, config['label_smoothing'], config['vocab_size'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss = tf.to_float(weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss = tf.reduce_sum(loss, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss = loss/tf.to_float(inp_len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_loss = tf.reduce_mean(loss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# class LMModel:\n",
    "#     def __init__(self, config, max_len):\n",
    "#         self.config = config\n",
    "#         self.max_len = max_len\n",
    "#         self.inp = tf.placeholder(tf.int32, shape=[None, max_len], name='inp')\n",
    "#         self.inp_len = tf.placeholder(tf.int32, shape=[None], name='inp_len')\n",
    "    \n",
    "#     def loss(self, is_training):\n",
    "#         self.transformer = transformer.Transformer(self.config, is_training)\n",
    "#         self.attention_bias = model_utils.get_decoder_self_attention_bias(self.max_len)\n",
    "#         encoder_outputs = self.transformer.encode(self.inp, self.attention_bias)\n",
    "#         logits = self.transformer.embedding_softmax_layer.linear(encoder_outputs)\n",
    "#         loss = model_utils.soft_cross_entropy_loss(logits, self.inp, self.config['label_smoothing'], self.config['vocab_size'])\n",
    "#         weights = tf.sequence_mask(self.inp_len, self.max_len, dtype=tf.int32)\n",
    "#         loss = loss * tf.to_float(weights)\n",
    "#         loss = tf.reduce_sum(loss, axis=1)\n",
    "#         loss = loss/tf.to_float(self.inp_len)\n",
    "#         loss = tf.reduce_mean(loss)\n",
    "#         return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "lm = lm_model.LMModel(config, config['max_length'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss = lm.loss(True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
